########## DiD analyses (main text) #############

## This code replicates all results figures from section 5 of the main text
## These use DiD to estimate the impact of entering coverage on incumbent vote/irregularities

## Load required packages

rm(list=ls())

library(fixest)
library(tidyverse)
library(ggplot2)

#### Load elections data

did_data_pres <- readRDS("did_election_data_pres.rds")

### Write function to run each individual regression

did_model <- function(outcome, fixed_effect, include_streams = FALSE, include_weights = FALSE) {
  
  # Define the base formula
  formula <- as.formula(
    paste0(outcome, " ~ inside", if (include_streams) " + i(streams_raw)" else "", " | ", fixed_effect)
  )
  
  # Run the model with specified arguments
  model <- feols(
    formula,
    data = did_data_pres,
    cluster = ~ps_id,
    weights = if (include_weights) ~weights else NULL
  )
  
  return(model)
}

### Run models and store in a data.frame 

models <- list(
  
  ### DPP VOTE SHARE
  
  ## Baseline
  dpp1.1pres = did_model("dpp_share", "ps_id + election"),
  dpp1.2pres = did_model("dpp_share", "ward + election"),
  dpp1.3pres = did_model("dpp_share", "constituency + election"),
  
  ## Matching
  
  dpp2.1pres = did_model("dpp_share", "ps_id + election", include_weights = TRUE),
  dpp2.2pres = did_model("dpp_share", "ward + election", include_weights = TRUE),
  dpp2.3pres = did_model("dpp_share", "constituency + election", include_weights = TRUE),
  
  ## Baseline with streams
  
  dpp3.1pres = did_model("dpp_share", "ps_id + election", include_streams = TRUE),
  dpp3.2pres = did_model("dpp_share", "ward + election", include_streams = TRUE),
  dpp3.3pres = did_model("dpp_share", "constituency + election", include_streams = TRUE),
  
  ## Matching with streams
  dpp4.1pres = did_model("dpp_share", "ps_id + election", 
                             include_streams = TRUE, include_weights = TRUE),
  dpp4.2pres = did_model("dpp_share", "ward + election", 
                             include_streams = TRUE, include_weights = TRUE),
  dpp4.3pres = did_model("dpp_share", "constituency + election", 
                             include_streams = TRUE, include_weights = TRUE),
  
  ### BALLOT REJECTION RATE
  
  ## Baseline
  rej1.1pres = did_model("rejected_share", "ps_id + election"),
  rej1.2pres = did_model("rejected_share", "ward + election"),
  rej1.3pres = did_model("rejected_share", "constituency + election"),
  
  ## Matching
  
  rej2.1pres = did_model("rejected_share", "ps_id + election", include_weights = TRUE),
  rej2.2pres = did_model("rejected_share", "ward + election", include_weights = TRUE),
  rej2.3pres = did_model("rejected_share", "constituency + election", include_weights = TRUE),
  
  ## Baseline with streams
  
  rej3.1pres = did_model("rejected_share", "ps_id + election", include_streams = TRUE),
  rej3.2pres = did_model("rejected_share", "ward + election", include_streams = TRUE),
  rej3.3pres = did_model("rejected_share", "constituency + election", include_streams = TRUE),
  
  ## Matching with streams
  rej4.1pres = did_model("rejected_share", "ps_id + election", 
                             include_streams = TRUE, include_weights = TRUE),
  rej4.2pres = did_model("rejected_share", "ward + election", 
                             include_streams = TRUE, include_weights = TRUE),
  rej4.3pres = did_model("rejected_share", "constituency + election", 
                             include_streams = TRUE, include_weights = TRUE)

  )

## Save results in a data.frame 

models_tidy <- lapply(models, broom::tidy)

did_results <- bind_rows(models_tidy, .id = "model_name") %>%
  filter(term == "inside") %>%
  mutate(conf.low95 = estimate - 1.96*std.error,
         conf.high95 = estimate + 1.96*std.error,
         conf.low90 = estimate - 1.68*std.error,
         conf.high90 = estimate + 1.68*std.error,
         spec = rep(rep(c("Baseline", 
                          "Matching", 
                          "Baseline\n+ Streams", 
                          "Matching\n+ Streams"), each = 3), 2), 
         FEs = c(rep(c("Polling station", 
                       "Ward", 
                       "Constituency"), 8)),
         outcome = rep(c("DPP vote share", "Ballot rejection rate"), each = 12)) %>%
  mutate(FEs = factor(FEs, levels = 
                        c("Polling station", 
                          "Ward", 
                          "Constituency")))


####### FIGURE 4 main text (ruling party vote share) ########

did_dpp <- did_results %>% filter(outcome == "DPP vote share")

ggplot(data=did_dpp, aes(x = estimate, 
                             y = spec)) +
  theme(panel.border = element_rect(colour = "black", fill = NA)) +
  geom_vline(xintercept=0, linetype=2) +
  geom_errorbarh(aes(xmin=conf.low90, xmax=conf.high90, height=0, width=0, 
                     col=FEs), size=0.7,
                 position = position_dodge(width=0.3)) +
  geom_errorbarh(aes(xmin=conf.low95, xmax=conf.high95, height=0, width=0, 
                     col=FEs), alpha=0.7,
                 position = position_dodge(width=0.3)) +
  geom_point(position = position_dodge(width=0.3), 
             aes(col=FEs)) +
  theme_bw() +
  theme(legend.position = "bottom", 
        axis.title.x = element_blank()) +
  ggtitle("Ruling party vote share declines as stations enter mobile internet coverage") +
  coord_flip() +
  scale_colour_manual(values = c("grey15", "darkred", "darkblue")) +
  xlab("ATT\n(% change in ruling party share)")



###### FIGURE 5 main text (ballot rejection rate) #########

did_reject <- did_results %>% filter(outcome == "Ballot rejection rate")

ggplot(data=did_reject, aes(x = estimate, 
                         y = spec)) +
  theme(panel.border = element_rect(colour = "black", fill = NA)) +
  geom_vline(xintercept=0, linetype=2) +
  geom_errorbarh(aes(xmin=conf.low90, xmax=conf.high90, height=0, width=0, 
                     col=FEs), size=0.7,
                 position = position_dodge(width=0.3)) +
  geom_errorbarh(aes(xmin=conf.low95, xmax=conf.high95, height=0, width=0, 
                     col=FEs), alpha=0.7,
                 position = position_dodge(width=0.3)) +
  geom_point(position = position_dodge(width=0.3), 
             aes(col=FEs)) +
  theme_bw() +
  theme(legend.position = "bottom", 
        axis.title.x = element_blank()) +
  ggtitle("Partial evidence that coverage reduces ballot rejections") +
  coord_flip() +
  scale_colour_manual(values = c("grey15", "darkred", "darkblue")) +
  xlab("ATT\n(% change in ballot rejection rate)")

